Load libraries
library('readr')
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("tidylog")
##
## Attaching package: 'tidylog'
## The following objects are masked from 'package:dplyr':
##
## add_count, add_tally, anti_join, count, distinct,
## distinct_all, distinct_at, distinct_if, filter, filter_all,
## filter_at, filter_if, full_join, group_by, group_by_all,
## group_by_at, group_by_if, inner_join, left_join, mutate,
## mutate_all, mutate_at, mutate_if, right_join, select,
## select_all, select_at, select_if, semi_join, summarise,
## summarise_all, summarise_at, summarise_if, summarize,
## summarize_all, summarize_at, summarize_if, tally, top_n,
## transmute, transmute_all, transmute_at, transmute_if
## The following object is masked from 'package:stats':
##
## filter
library("forcats")
library("lubridate")
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library("anytime")
library("rpivotTable")
library("DT")
Format data
raw_joined <- raw_results %>%
full_join(raw_main, by = c("appid" = "rpmainid"))
## full_join: added 8 rows and added 11 columns (stopdate, stoptime, streetnbr, streetdir, street, …)
format_joined <- raw_joined %>%
rename(birth_year = `Year of birth`,
stop_duration_minutes = Min,
boulder_resident = enfaction,
# stop_date = stopdate,
stop_ID = appid,
item_code = itemcode,
item_description = itemdesc,
activity_type = appkey) %>%
mutate(item_code = as_factor(item_code),
item_description = as_factor(item_description),
activity_type = factor(activity_type,
levels = c("RPT1", "RPT2", "RPT3",
"RPT4", "RPT5", "RPT6",
"RPT7", "1"),
labels = c("Type of Stop",
"Stop Reason",
"Search Conducted",
"Search Authority",
"Contraband Found",
"Result of Stop",
"Type Code Not Disclosed",
"Data error")),
activity_date_time = dmy_hm(addtime),
stop_date = dmy(stopdate),
stop_date_time = dmy_hm(paste(stop_date, stoptime)),
sex = as_factor(sex),
race = factor(race,
levels = c("A", "B", "I", "U", "W"),
labels = c("Asian",
"Black or African American",
"American Indian or Alaskan Native",
"Unknown",
"White")),
ethnic = factor(ethnic,
levels = c("H", "N"),
labels = c("Hispanic", "Non-Hispanic")),
boulder_resident = as_factor(boulder_resident)) %>%
# streetdir not currently used, per data dict
select(-streetdir)
## Warning: 23997 failed to parse.
## Warning: 24108 failed to parse.
## Warning: All formats failed to parse. No formats found.
## mutate: converted 'activity_type' from character to factor (0 new NA)
## mutate: converted 'item_code' from character to factor (0 new NA)
## mutate: converted 'item_description' from character to factor (0 new NA)
## mutate: converted 'sex' from character to factor (0 new NA)
## mutate: converted 'race' from character to factor (0 new NA)
## mutate: converted 'ethnic' from character to factor (0 new NA)
## mutate: converted 'boulder_resident' from character to factor (0 new NA)
## mutate: new variable 'activity_date_time' with 4120 unique values and 61% NA
## mutate: new variable 'stop_date' with 146 unique values and 61% NA
## mutate: new variable 'stop_date_time' with one unique value and 100% NA
## select: dropped one variable (streetdir)
glimpse(format_joined)
## Observations: 39,577
## Variables: 18
## $ activity_type <fct> Type of Stop, Type of Stop, Stop Reason, S…
## $ stop_ID <dbl> 1, 2862, 2862, 2862, 2862, 2862, 2863, 286…
## $ item_code <fct> VEH, VEH, SPEE, *NO, WAR, NO, VEH, RECK, *…
## $ item_description <fct> VEHICLE, VEHICLE, TRAFFIC-SPEEDING, *NO SE…
## $ addtime <chr> "9/26/2018 15:09", "1/1/2018 16:26", "1/1/…
## $ stopdate <chr> NA, "01/01/2018", "01/01/2018", "01/01/201…
## $ stoptime <dbl> NA, 1619, 1619, 1619, 1619, 1619, 1630, 16…
## $ streetnbr <chr> NA, "4561", "4561", "4561", "4561", "4561"…
## $ street <chr> NA, "ARAPAHOE AVE/MACARTHUR DR", "ARAPAHOE…
## $ stop_duration_minutes <dbl> NA, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 35,…
## $ sex <fct> NA, M, M, M, M, M, M, M, M, M, M, M, M, M,…
## $ race <fct> NA, White, White, White, White, White, Whi…
## $ ethnic <fct> NA, Non-Hispanic, Non-Hispanic, Non-Hispan…
## $ birth_year <dbl> NA, 1976, 1976, 1976, 1976, 1976, 2000, 20…
## $ boulder_resident <fct> NA, Y, Y, Y, Y, Y, N, N, N, N, N, N, N, N,…
## $ activity_date_time <dttm> NA, 2018-01-01 16:26:00, 2018-01-01 16:26…
## $ stop_date <date> NA, 2018-01-01, 2018-01-01, 2018-01-01, 2…
## $ stop_date_time <dttm> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
# Create pivot table
sub_tbl_for_pivot <- format_joined %>%
# remove cols not needed for the pivot table
select(
# can't get date time working right now, so excluding
# -addtime,
# -stopdate,
-stop_date,
-stoptime,
-activity_date_time,
-stop_date_time
) %>%
# rearrange cols
select(stop_ID,
stopdate,
stop_duration_minutes,
streetnbr,
street,
sex,
race,
ethnic,
birth_year,
boulder_resident,
activity_type,
addtime,
item_code,
item_description) %>%
arrange(stopdate, stop_ID)
## select: dropped 4 variables (stoptime, activity_date_time, stop_date, stop_date_time)